/***
This do-file creates the table 'Association Between Rent and Changes in 
Business Revenue and Employment'. 
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Small Business Revenue"

* Create required subfolders
cap mkdir "${root}/results/Small Business Revenue"

*-------------------------------------------------------------------------------
* Panel A
*-------------------------------------------------------------------------------

project, uses("${root}/data/web/data/Womply - ZCTA - 2020.csv")
import delimited  "${root}/data/web/data/Womply - ZCTA - 2020.csv", clear

gen revenue_change_100 = 100 * revenue_all_apr2020

* Add covars
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge m:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", keep(1 3) nogen keepusing(pop_2014_2018_est medhhinc_2014_2018_est med_2br_2014_2018_est zarealand)
gen zarealand_sqm = zarealand/ (1609.37 ^ 2)
drop zarealand

rename (pop_2014_2018_est medhhinc_2014_2018_est med_2br_2014_2018_est) (pop_2018 medhhinc_2018 med_2br_2018) 

project, uses("${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta")
merge m:1 zcta using "${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta", keep(1 3) nogen keepusing(log_density_high_pay_jobs)

* Gen population density by ZCTA
gen pop_conc_zcta = pop_2018 / zarealand_sqm

* Put population density on a log scale
gen l_pop_conc_zcta = log(pop_conc_zcta)

* Merge counties (for SEs)
ren zcta zcta5
preserve
	project, uses("${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt")
	import delimited "${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt", encoding(ISO-8859-2) clear
	sort zcta5 poppt
	bysort zcta5 (poppt): keep if _n == _N // only keeps the county where most of the zcta lives
	gen county_fips = 1000 * state + county
	keep zcta5 county_fips
	tempfile zcta_county_CW
	save `zcta_county_CW'
restore
merge m:1 zcta5 using `zcta_county_CW', keep(1 3) nogen

* Column 1
reg revenue_change_100 med_2br_2018 [w = pop_2018], vce(cluster county_fips)
		local panA1_betarent = _b[med_2br_2018] * 1000
		local panA1_serent = _se[med_2br_2018] * 1000
		local panA1_betadens = .
		local panA1_sedens = .
		local panA1_obs = e(N)
		
* Column 2 - add county FEs
areg revenue_change_100 med_2br_2018 [w = pop_2018], absorb(county_fips) cluster(county_fips)
		local panA2_betarent = _b[med_2br_2018] * 1000
		local panA2_serent = _se[med_2br_2018] * 1000
		local panA2_betadens = .
		local panA2_sedens = .
		local panA2_obs = e(N)
				
* Column 3 - add control for density of high wage workers
areg revenue_change_100 med_2br_2018 log_density_high_pay_jobs [w = pop_2018], absorb(county_fips) cluster(county_fips)
		local panA3_betarent = _b[med_2br_2018] * 1000
		local panA3_serent = _se[med_2br_2018] * 1000
		local panA3_betadens = _b[log_density_high_pay_jobs]
		local panA3_sedens = _se[log_density_high_pay_jobs]
		local panA3_obs = e(N)

*-------------------------------------------------------------------------------
* Panel B
*-------------------------------------------------------------------------------

* Load employment data
project, uses("${root}/data/web/data/Employment - County - Weekly.csv")
import delimited "${root}/data/web/data/Employment - County - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year)

rename emp_incq1 emp_index

* Make a percentage change
replace emp_index = 100 * emp_index

* Take mean of month
gegen temp = mean(emp_index) if year == 2020 & month == 4, by(countyfips)
	gegen change_apr_2020 = mean(temp), by(countyfips)
	drop temp	
	
gegen temp = mean(emp_index) if year == 2020 & month == 5, by(countyfips)
	gegen change_may_2020 = mean(temp), by(countyfips)
	drop temp	
	
gegen temp = mean(emp_index) if year == 2020 & month == 6, by(countyfips)
	gegen change_jun_2020 = mean(temp), by(countyfips)
	drop temp	
	
gegen temp = mean(emp_index) if year == 2020 & month == 7, by(countyfips)
	gegen change_jul_2020 = mean(temp), by(countyfips)
	drop temp	

* Merge county-level covariates 
rename countyfips county_fips
project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
merge m:1 county_fips using "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", keepusing(med_2br_2014_2018_est pop_2014_2018_est) keep(3) nogen 
rename county_fips countyfips
rename (med_2br_2014_2018_est pop_2014_2018_est) (median_2br_rent_2018 pop_2018)

gcollapse change_* median_2br_rent_2018 pop_2018, by(countyfips) 

* Calculate density of high wage workers at the county level

preserve
	project, uses("${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt")
	import delimited "${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt", encoding(ISO-8859-2) clear
	sort zcta5 poppt
	bysort zcta5 (poppt): keep if _n == _N // only keeps the county where most of the zcta lives
	gen county_fips = 1000 * state + county
	keep zcta5 county_fips
	tempfile zcta_county_CW
	save `zcta_county_CW'
restore

preserve 
project, uses("${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta")
use "${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta", clear

rename zcta zcta5 
merge m:1 zcta5 using `zcta_county_CW', keep(3 1) nogen

gcollapse (mean) density_high_pay_jobs, by(county_fips)

rename county_fips countyfips

tempfile dens_q4
save `dens_q4'
restore

merge m:1 countyfips using `dens_q4', keep(1 3)

gen log_density_high_pay_jobs = log(density_high_pay_jobs)

reg change_jul_2020 median_2br_rent_2018 [w = pop_2018], cluster(countyfips)
local panB1_betarent = _b[median_2br_rent_2018] * 1000
local panB1_serent = _se[median_2br_rent_2018] * 1000
local panB1_betadens = .
local panB1_sedens = .
local panB1_obs = e(N)

reg change_jul_2020 median_2br_rent_2018 log_density_high_pay_jobs [w = pop_2018], cluster(countyfips)
local panB2_betarent = _b[median_2br_rent_2018] * 1000
local panB2_serent = _se[median_2br_rent_2018] * 1000
local panB2_betadens = _b[log_density_high_pay_jobs]
local panB2_sedens = _se[log_density_high_pay_jobs]
local panB2_obs = e(N)

* Load ZIP-level employment data
project, uses("${root}/data/web/data/Earnin - ZCTA - 2020.csv")
import delimited "${root}/data/web/data/Earnin - ZCTA - 2020.csv", clear

* Make a percentage change
gen zcta_empq1_july2020 = -100 + 100 * emp_incq1_jul2020

* Merge counties (for SEs)
ren zcta zcta5
preserve
	project, uses("${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt")
	import delimited "${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt", encoding(ISO-8859-2) clear
	sort zcta5 poppt
	bysort zcta5 (poppt): keep if _n == _N // only keeps the county where most of the zcta lives
	gen county_fips = 1000 * state + county
	keep zcta5 county_fips
	tempfile zcta_county_CW
	save `zcta_county_CW'
restore

merge m:1 zcta5 using `zcta_county_CW', keep(1 3 ) nogen
ren zcta5 zcta 

project, uses("${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta")
merge m:1 zcta using "${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta", nogen keep(1 3)

* Merge ZCTA-level covariates
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge m:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", keepusing(med_2br_2014_2018_est pop_2014_2018_est) keep(3) nogen 

ren (med_2br_2014_2018_est pop_2014_2018_est) (median_2br_rent_2018 pop_2018)
gcollapse zcta_empq1_july2020 median_2br_rent_2018 pop_2018 density_high_pay_jobs, by(zcta county_fips) 
gen log_density_high_pay_jobs = log(density_high_pay_jobs)

areg zcta_empq1_july2020 median_2br_rent_2018 log_density_high_pay_jobs [w = pop_2018], absorb(county_fips) cluster(county_fips)
local panB3_betarent = _b[median_2br_rent_2018] * 1000
local panB3_serent = _se[median_2br_rent_2018] * 1000
local panB3_betadens = _b[log_density_high_pay_jobs]
local panB3_sedens = _se[log_density_high_pay_jobs]
local panB3_obs = e(N)

*-------------------------------------------------------------------------------
* Export table
*-------------------------------------------------------------------------------

clear 
set obs 100

forv col = 1/3 {
	gen col`col' = .
	replace col`col' = `panA`col'_betarent' if _n == 1
	replace col`col' = `panA`col'_serent' if _n == 2
	replace col`col' = `panA`col'_betadens' if _n == 3
	replace col`col' = `panA`col'_sedens' if _n == 4
	replace col`col' = `panA`col'_obs' if _n == 5
	
	replace col`col' = `panB`col'_betarent' if _n == 6
	replace col`col' = `panB`col'_serent' if _n == 7
	replace col`col' = `panB`col'_betadens' if _n == 8
	replace col`col' = `panB`col'_sedens' if _n == 9
	replace col`col' = `panB`col'_obs' if _n == 10
}

* Export
export excel "${root}/results/new_table_1.xlsx", sheet(new_table_1, replace)
project, creates("${root}/results/new_table_1.xlsx")
